## *********************************
##      Table 2 Individual Survey      
##        Hao Zhang & Ye Zhang
##             2025/7/24
## *********************************

library(readstata13)
library(lfe)
library(stargazer)
library(tidyverse)
library(readxl)

## Read in city data
setwd("../raw data")
city <- read_excel("city panel 2010-2019.xlsx")
city <- city %>% select(city, year, gdp_growth, gdp, gdppc, fiscal_inc, fiscal_exp, FDI, hosp, unemp, pop, tax_absc_firm) %>%
  filter(year > 2010 & year < 2016) %>% as.data.frame()

## Read in citycode for matching
citycode <- read_excel("citycode.xlsx") %>% distinct()
citycode <- citycode[!duplicated(citycode[c("city")]), ]
city <- left_join(city, citycode, by = "city") %>% select(-city)
city <- city %>% mutate(across(where(is.character), ~parse_number(.)))

## calculate gdp pressure in the same city
city <- city %>% 
  mutate(city4 = as.integer(citycode/100)) %>% 
  filter(city4 != 1100, city4 != 1200, city4 != 3100, city4 != 5102) %>%
  filter(substr(as.character(city4), 3, 4) != "01") %>%
  mutate(province = as.integer(citycode/10000)) %>% 
  group_by(province, year) %>%
  mutate(gdpgrowth_mean = mean(gdp_growth, na.rm = TRUE)) %>%
  mutate(gdp_pressure = gdpgrowth_mean - gdp_growth)

## read in cfps data
load("../raw data/cfps 2012-2017.RData")

## merge with city covariates
city <- city %>% mutate(year = year + 1)
data <- data %>% mutate(city4 = as.integer(code/100), year = as.numeric(year))
data <- left_join(data, city, by = c("city4", "year"))

## merge with mayor covariates
mayor <- read.dta13("city mayor 1990-2015.dta") %>% rename(city4 = citycode) %>%
  select(city4, year, code, begin, end, educ, ethnicity, gender, birth_year, seq)
data <- left_join(data, mayor, by = c("city4", "year"))

# create variables
data$id <- data %>% group_by(city4, year) %>% group_indices()
data$deficit <- data$fiscal_exp - data$fiscal_inc
data$gdp_pressure <- data$gdp_pressure/100

# save Rdata
rm(list=setdiff(ls(), "data"))
save.image("../analysis data/individual analysis.RData")

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# run regressions and report the results
m1 <- felm(unemployment ~ gdp_pressure + ihs(gdp_growth) + log(gdppc) + ihs(deficit/gdp) + ihs(tax_absc_firm/gdp) + log(FDI + 1)  + log(unemp/pop/10000) | 
             city4 + year + pid + begin + educ.y + ethnicity + gender | 0 | city4, data = data)

m2 <- felm(medical ~ gdp_pressure + ihs(gdp_growth) + log(gdppc) + ihs(deficit/gdp) + ihs(tax_absc_firm/gdp) + log(FDI + 1) + log(hosp/pop/10000) | 
             city4 + year + pid + begin + educ.y + ethnicity + gender | 0 | city4, data = data)

m3 <- felm(pension ~ gdp_pressure + ihs(gdp_growth) + log(gdppc) + ihs(deficit/gdp) + ihs(tax_absc_firm/gdp) + log(FDI + 1) + log(hosp/pop/10000) | 
             city4 + year + pid + begin + educ.y + ethnicity + gender | 0 | city4, data = data)

# output regression table (omit unemployment rates and medical resources)
stargazer(m1, m2, m3, title = "Interjurisdictional Competition and Social Insurances (Individual Level)", 
          dep.var.labels = c("Social Insurance Participation"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y"),
                           c("Individual Fixed Effects", "Y", "Y", "Y"),
                           c("City Fixed Effects", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y")))
